{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "distributed-gateway",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.datasets import load_iris\n",
    "from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ongoing-skirt",
   "metadata": {},
   "source": [
    "1.导入数据（10分）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "defined-drinking",
   "metadata": {},
   "outputs": [],
   "source": [
    "iris = load_iris()\n",
    "columns = iris.feature_names\n",
    "X = iris.data\n",
    "y = iris.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "clinical-realtor",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "occupied-charge",
   "metadata": {},
   "source": [
    "2.切分数据集（10分）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "changed-baking",
   "metadata": {},
   "outputs": [],
   "source": [
    "Xtrain,Xtest,Ytrain,Ytest = train_test_split(X,y,test_size = 0.3,random_state = 420)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ahead-digit",
   "metadata": {},
   "source": [
    "3.使用标准化包，对训练集来学习，从而对训练集和测试集来做标准化（20分）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "circular-collaboration",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression as LR \n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.preprocessing import StandardScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "postal-loading",
   "metadata": {},
   "outputs": [],
   "source": [
    "#对训练集和测试集做标准化-去量纲\n",
    "std  = StandardScaler().fit(Xtrain)\n",
    "Xtrain_ = std.transform(Xtrain)\n",
    "Xtest_ = std.transform(Xtest)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "retired-washington",
   "metadata": {},
   "source": [
    "4.在确定l2范式的情况下，使用网格搜索判断solver, C的最优组合（20分）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "editorial-script",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=5, error_score=nan,\n",
       "             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,\n",
       "                                          fit_intercept=True,\n",
       "                                          intercept_scaling=1, l1_ratio=None,\n",
       "                                          max_iter=10000, multi_class='auto',\n",
       "                                          n_jobs=None, penalty='l2',\n",
       "                                          random_state=None, solver='lbfgs',\n",
       "                                          tol=0.0001, verbose=0,\n",
       "                                          warm_start=False),\n",
       "             iid='deprecated', n_jobs=None,\n",
       "             param_grid={'C': [0.05, 0.102777777777777...\n",
       "                               0.3138888888888889, 0.36666666666666664,\n",
       "                               0.41944444444444445, 0.4722222222222222, 0.525,\n",
       "                               0.5777777777777778, 0.6305555555555556,\n",
       "                               0.6833333333333333, 0.7361111111111112,\n",
       "                               0.788888888888889, 0.8416666666666667,\n",
       "                               0.8944444444444445, 0.9472222222222223, 1.0],\n",
       "                         'solver': ['liblinear', 'sag', 'newton-cg', 'lbfgs']},\n",
       "             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
       "             scoring=None, verbose=0)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#在l2范式下，判断c和solver的最优值-----必须以字典形式传入\n",
    "p = {\n",
    "    'C':list(np.linspace(0.05,1,19)),\n",
    "    'solver':['liblinear','sag','newton-cg','lbfgs']\n",
    "}\n",
    "\n",
    "model = LR(penalty='l2',max_iter=10000)\n",
    "\n",
    "GS = GridSearchCV(model,p,cv = 5)\n",
    "GS.fit(Xtrain_,Ytrain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "periodic-austria",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9714285714285715"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "GS.best_score_   #最好的评分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "romantic-timothy",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'C': 0.41944444444444445, 'solver': 'sag'}"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "GS.best_params_   #最好评分下的参数"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "deadly-southeast",
   "metadata": {},
   "source": [
    "5.将最优的结果重新用来实例化模型，查看训练集和测试集下的分数（20分）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "assured-marine",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=0.41944444444444445, class_weight=None, dual=False,\n",
       "                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,\n",
       "                   max_iter=10000, multi_class='auto', n_jobs=None,\n",
       "                   penalty='l2', random_state=None, solver='sag', tol=0.0001,\n",
       "                   verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = LR(\n",
    "    penalty='l2',\n",
    "    max_iter=10000,\n",
    "    C = GS.best_params_['C'],\n",
    "    solver=GS.best_params_['solver'],\n",
    "    \n",
    ")\n",
    "model.fit(Xtrain_,Ytrain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "fallen-determination",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.9714285714285714, 0.9555555555555556)"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.score(Xtrain_,Ytrain),model.score(Xtest_,Ytest)\n",
    "#训练集评分0.98,测试集评分0.96"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "attempted-midwest",
   "metadata": {},
   "source": [
    "6.计算精准率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "vietnamese-burst",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.38095238095238093"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import metrics\n",
    "Ypre = model.predict(Xtrain)\n",
    "y_test_pre = model.predict(Xtest)\n",
    "metrics.precision_score(Ytrain,Ypre,average='micro')#训练集精准率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "worldwide-attachment",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.2222222222222222"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metrics.precision_score(Ytest,y_test_pre,average='micro')#测试集精准率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "likely-papua",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
